suppressPackageStartupMessages(library(tidyverse))
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities

Settings

data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'

wd <- "/Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/"
setwd(wd)

figdir <- paste0(wd, 'Figures/Espresso/m3Ctranscripts/')
tabledir <- paste0(wd, 'Tables/Espresso/')

theme_set(
  theme_classic(base_size = 7) +
    theme(legend.position = 'bottom')
)

Read data

espresso_deseq2 <- 
  read_tsv(
    paste0(wd, 'Tables/Espresso/espresso_deseq2_genetype2_isDET_2024-04-01.tsv')
  )
## Rows: 36717 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): transcript_id, transcript_type, transcript_name, gene_id, gene_typ...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
espresso_deseq2
## # A tibble: 36,717 × 29
##    transcript_id     transcript_type transcript_name gene_id gene_type gene_name
##    <chr>             <chr>           <chr>           <chr>   <chr>     <chr>    
##  1 ENST00000498442.1 retained_intron CRBN-212        ENSG00… protein_… CRBN     
##  2 ENST00000459840.5 retained_intron CRBN-205        ENSG00… protein_… CRBN     
##  3 ENST00000231948.9 protein_coding  CRBN-201        ENSG00… protein_… CRBN     
##  4 ENST00000432408.6 protein_coding  CRBN-203        ENSG00… protein_… CRBN     
##  5 ENST00000339437.… protein_coding  TRNT1-203       ENSG00… protein_… TRNT1    
##  6 ENST00000488263.5 retained_intron CRBN-209        ENSG00… protein_… CRBN     
##  7 ENST00000420393.5 protein_coding  TRNT1-207       ENSG00… protein_… TRNT1    
##  8 ENST00000698415.1 retained_intron TRNT1-230       ENSG00… protein_… TRNT1    
##  9 ENST00000450014.1 protein_coding  CRBN-204        ENSG00… protein_… CRBN     
## 10 ENST00000698416.1 retained_intron TRNT1-231       ENSG00… protein_… TRNT1    
## # ℹ 36,707 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## #   siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## #   siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## #   siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## #   siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## #   siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
methylated_positions <- 
  read_tsv(
    paste0(wd, 'Tables/DRS/Positions/common_sig_seqs_in_intensity_up_2024-04-10.tsv.gz')
  )
## Rows: 605 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (30): transcript_id, transcript_name, ref_kmer, GMM_cov_type_G, cluster_...
## dbl (35): position, GMM_logit_pvalue_G, KS_dwell_pvalue_G, KS_intensity_pval...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
methylated_positions
## # A tibble: 605 × 65
##    transcript_id     transcript_name position ref_kmer GMM_logit_pvalue_G
##    <chr>             <chr>              <dbl> <chr>                 <dbl>
##  1 ENST00000429711.7 RPL32-204            422 GCCCA                 1    
##  2 ENST00000647248.2 RPL35A-211           380 ACCCC                 1    
##  3 ENST00000647248.2 RPL35A-211           381 CCCCT                 1    
##  4 ENST00000389680.2 MT-RNR1-201           43 ACACA                 1    
##  5 ENST00000389680.2 MT-RNR1-201           57 CCCCG                 1    
##  6 ENST00000389680.2 MT-RNR1-201           71 GTTCA                 1    
##  7 ENST00000389680.2 MT-RNR1-201           73 TCACC                 1    
##  8 ENST00000389680.2 MT-RNR1-201           75 ACCCT                 0.777
##  9 ENST00000389680.2 MT-RNR1-201           93 ATCAA                 1    
## 10 ENST00000389680.2 MT-RNR1-201          138 GCTTA                 1    
## # ℹ 595 more rows
## # ℹ 60 more variables: KS_dwell_pvalue_G <dbl>, KS_intensity_pvalue_G <dbl>,
## #   GMM_cov_type_G <chr>, GMM_n_clust_G <dbl>, cluster_counts_G <chr>,
## #   Logit_LOR_G <dbl>, c1_mean_intensity_G <dbl>, c2_mean_intensity_G <dbl>,
## #   c1_median_intensity_G <dbl>, c2_median_intensity_G <dbl>,
## #   c1_sd_intensity_G <dbl>, c2_sd_intensity_G <dbl>, c1_mean_dwell_G <dbl>,
## #   c2_mean_dwell_G <dbl>, c1_median_dwell_G <dbl>, c2_median_dwell_G <dbl>, …
methylated_transcripts <- 
  methylated_positions |> 
  select(transcript_id) |> 
  distinct() |> 
  mutate(ismethylated = '+')

espresso_deseq2_m3Cinfo <- 
  espresso_deseq2 |> 
  left_join(methylated_transcripts) |> 
  replace_na(list(ismethylated = '-'))
## Joining with `by = join_by(transcript_id)`
espresso_deseq2_m3Cinfo
## # A tibble: 36,717 × 30
##    transcript_id     transcript_type transcript_name gene_id gene_type gene_name
##    <chr>             <chr>           <chr>           <chr>   <chr>     <chr>    
##  1 ENST00000498442.1 retained_intron CRBN-212        ENSG00… protein_… CRBN     
##  2 ENST00000459840.5 retained_intron CRBN-205        ENSG00… protein_… CRBN     
##  3 ENST00000231948.9 protein_coding  CRBN-201        ENSG00… protein_… CRBN     
##  4 ENST00000432408.6 protein_coding  CRBN-203        ENSG00… protein_… CRBN     
##  5 ENST00000339437.… protein_coding  TRNT1-203       ENSG00… protein_… TRNT1    
##  6 ENST00000488263.5 retained_intron CRBN-209        ENSG00… protein_… CRBN     
##  7 ENST00000420393.5 protein_coding  TRNT1-207       ENSG00… protein_… TRNT1    
##  8 ENST00000698415.1 retained_intron TRNT1-230       ENSG00… protein_… TRNT1    
##  9 ENST00000450014.1 protein_coding  CRBN-204        ENSG00… protein_… CRBN     
## 10 ENST00000698416.1 retained_intron TRNT1-231       ENSG00… protein_… TRNT1    
## # ℹ 36,707 more rows
## # ℹ 24 more variables: siMETTL2A_baseMean <dbl>,
## #   siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## #   siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## #   siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## #   siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## #   siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
espresso_deseq2_DET_groupedby_methylation <- 
  espresso_deseq2_m3Cinfo |> 
  group_by(ismethylated, common_DETs) |> 
  reframe(n = n()) |> 
  group_by(ismethylated) |> 
  mutate(percentage = 100 * n / sum(n)) 
espresso_deseq2_DET_groupedby_methylation
## # A tibble: 6 × 4
## # Groups:   ismethylated [2]
##   ismethylated common_DETs     n percentage
##   <chr>        <chr>       <int>      <dbl>
## 1 +            down            1       1.18
## 2 +            other          50      58.8 
## 3 +            up             34      40   
## 4 -            down          539       1.47
## 5 -            other       35689      97.4 
## 6 -            up            404       1.10
espresso_deseq2_DET_groupedby_methylation_genetypes <- 
  espresso_deseq2_m3Cinfo |> 
  group_by(ismethylated, genetype2, common_DETs) |> 
  reframe(n = n()) |> 
  group_by(ismethylated, genetype2) |> 
  mutate(percentage = 100 * n / sum(n)) 
espresso_deseq2_DET_groupedby_methylation_genetypes
## # A tibble: 18 × 5
## # Groups:   ismethylated, genetype2 [9]
##    ismethylated genetype2        common_DETs     n percentage
##    <chr>        <chr>            <chr>       <int>      <dbl>
##  1 +            mRNA             down            1      1.39 
##  2 +            mRNA             other          48     66.7  
##  3 +            mRNA             up             23     31.9  
##  4 +            mt-mRNA          up              9    100    
##  5 +            mt-rRNA          up              2    100    
##  6 +            unannotated gene other           2    100    
##  7 -            mRNA             down          526      1.69 
##  8 -            mRNA             other       30186     97.1  
##  9 -            mRNA             up            378      1.22 
## 10 -            mt-mRNA          other           2     50    
## 11 -            mt-mRNA          up              2     50    
## 12 -            mt-tRNA          other           7    100    
## 13 -            other ncRNAs     down           11      0.238
## 14 -            other ncRNAs     other        4590     99.4  
## 15 -            other ncRNAs     up             16      0.347
## 16 -            unannotated gene down            2      0.219
## 17 -            unannotated gene other         904     98.9  
## 18 -            unannotated gene up              8      0.875
espresso_deseq2_DET_groupedby_methylation_barplot <- 
  espresso_deseq2_DET_groupedby_methylation |> 
  ggplot(aes(
    x = ismethylated, y = n, 
    fill = common_DETs
  )) +
  geom_bar(stat = 'identity', position = position_fill()) +
  coord_flip() +
  scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
  scale_y_reverse()
espresso_deseq2_DET_groupedby_methylation_barplot |> 
  ggsave_multiple_formats(outdir = figdir, width = 5, height = 3.5, fontsize = 7)

espresso_deseq2_DET_groupedby_methylation_genetypes_barplot <- 
  espresso_deseq2_DET_groupedby_methylation_genetypes |> 
  ggplot(aes(
    x = interaction(ismethylated |> fct_rev(), genetype2 |> fct_rev()), 
    y = n, 
    fill = common_DETs
  )) +
  geom_bar(stat = 'identity', position = position_fill()) +
  scale_x_discrete(guide = ggh4x::guide_axis_nested(delim = '.')) +
  coord_flip() +
  scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
  scale_y_reverse() 
espresso_deseq2_DET_groupedby_methylation_genetypes_barplot |> 
  ggsave_multiple_formats(
    outdir = figdir, width = 6, height = 5, fontsize = 7
  )
## Warning: The S3 guide system was deprecated in ggplot2 3.5.0.
## ℹ It has been replaced by a ggproto system that can be extended.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.